Start R in the bash terminal and run the following lines to install the libraries.
install.packages("e1071")
install.packages("caret")
install.packages("rworldmap")
install.packages("maptools")
install.packages("rgeos")
install.packages("reshape")
install.packages("randomForest")
library(ggplot2)
library(rgdal)
## Loading required package: sp
## Please note that rgdal will be retired by the end of 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
##
## rgdal: version: 1.5-25, (SVN revision 1143)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
## Overwritten PROJ_LIB was /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/proj
library(raster)
library(rgeos)
## rgeos version: 0.5-7, (SVN revision 676)
## GEOS runtime version: 3.8.1-CAPI-1.13.3
## Please note that rgeos will be retired by the end of 2023,
## plan transition to sf functions using GEOS at your earliest convenience.
## Linking to sp version: 1.4-5
## Polygon checking: TRUE
library(reshape)
library(rasterVis)
## Loading required package: lattice
library(dismo)
library(InformationValue)
library(mgcv)
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:raster':
##
## getData
## This is mgcv 1.8-36. For overview type 'help("mgcv-package")'.
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
library(e1071)
##
## Attaching package: 'e1071'
## The following object is masked from 'package:raster':
##
## interpolate
library(caret)
##
## Attaching package: 'caret'
## The following objects are masked from 'package:InformationValue':
##
## confusionMatrix, precision, recall, sensitivity, specificity
set.seed(30)
We will use Montane woodcreper (Lepidocolaptes lacrymiger) as example species.
This species has a large range, occurring from the coastal cordillera of Venezuela along the Andes south to south-east Peru and central Bolivia.
Let’s suppose that you have done field work and you have collected the bird presence in the Lepidocolaptes_lacrymiger_allpoints.csv file
points_field <- read.csv("./geodata/shp/Lepidocolaptes_lacrymiger_allpoints.csv")
str(points_field)
## 'data.frame': 3438 obs. of 3 variables:
## $ lon : num -76.2 -76.2 -74.3 -74.3 -76.1 ...
## $ lat : num 3.98 3.93 4.61 4.61 4.75 ...
## $ scientific_name: chr "Lepidocolaptes_lacrymiger" "Lepidocolaptes_lacrymiger" "Lepidocolaptes_lacrymiger" "Lepidocolaptes_lacrymiger" ...
Morover you also download aditional points data from the https://www.gbif.org/ .
# gbif_points = gbif('Lepidocolaptes' , 'lacrymiger' , download=T , geo=T , ext=c(-82,-59,-21,14) , removeZeros=TRUE )
# save(gbif_points , file="~/SE_data/exercise/geodata/SDM/gbif_points.Rdata")
load("./geodata/SDM/gbif_points.Rdata")
points=rbind.data.frame(
data.frame(lat=gbif_points$lat,lon=gbif_points$lon),
data.frame(lat=points_field$lat,lon=points_field$lon)
)
str(points)
## 'data.frame': 34835 obs. of 2 variables:
## $ lat: num 0.02978 5.3731 4.76848 0.00634 6.40736 ...
## $ lon: num -78.7 -75.9 -75.5 -78.7 -75.7 ...
rCld <- raster("./geodata/cloud/SA_meanannual.tif")
# compute min and max
rCld = setMinMax(rCld)
rCldIA <- raster("./geodata/cloud/SA_intra.tif")
rCldIA = setMinMax(rCldIA)
rElv <- raster("./geodata/dem/SA_elevation_mn_GMTED2010_mn_msk.tif")
rElv = setMinMax(rElv)
rVeg <- raster("./geodata/vegetation/SA_tree_mn_percentage_GFC2013.tif")
rVeg = setMinMax(rVeg)
rElv
## class : RasterLayer
## dimensions : 8400, 5880, 49392000 (nrow, ncol, ncell)
## resolution : 0.008333333, 0.008333333 (x, y)
## extent : -83, -34, -56, 14 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +no_defs
## source : SA_elevation_mn_GMTED2010_mn_msk.tif
## names : SA_elevation_mn_GMTED2010_mn_msk
## values : -400, 6736 (min, max)
birdrange <- readOGR("./geodata/shp", "cartodb-query")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/erinstearns/Documents/git_repos/SE_data/exercise/geodata/shp", layer: "cartodb-query"
## with 2 features
## It has 7 fields
plot(rElv)
points(points$lon, points$lat, col = "red", cex = .3)
plot(birdrange,add=TRUE)
# indicate that these data are presences
presence <- matrix(1,nrow(points),1)
points <- cbind(points,presence)
head(points)
## lat lon presence
## 1 0.029785 -78.68224 1
## 2 5.373100 -75.89100 1
## 3 4.768477 -75.45283 1
## 4 0.006336 -78.67635 1
## 5 6.407356 -75.66417 1
## 6 11.107720 -74.04844 1
# building spatial dataframe
coordinates(points)=c('lon','lat')
str(points)
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 34835 obs. of 1 variable:
## .. ..$ presence: num [1:34835] 1 1 1 1 1 1 1 1 1 1 ...
## ..@ coords.nrs : int [1:2] 2 1
## ..@ coords : num [1:34835, 1:2] -78.7 -75.9 -75.5 -78.7 -75.7 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : NULL
## .. .. ..$ : chr [1:2] "lon" "lat"
## ..@ bbox : num [1:2, 1:2] -81.1 -18.8 -62.6 11.2
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:2] "lon" "lat"
## .. .. ..$ : chr [1:2] "min" "max"
## ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
## .. .. ..@ projargs: chr NA
# assign projection
projection(points) <- "+proj=longlat +datum=WGS84"
Loading eBird sampling dataset, in order to obtain “absence” data
# link to global sampling raster
# first crop with the gdal and then load the cropversion
system("gdal_translate -projwin -82 14 -59 -21 -co COMPRESS=DEFLATE -co ZLEVEL=9 ./geodata/SDM/eBirdSampling_filtered.tif ./geodata/SDM/eBirdSampling_filtered_crop.tif")
## Warning in system("gdal_translate -projwin -82 14 -59 -21 -co COMPRESS=DEFLATE
## -co ZLEVEL=9 ./geodata/SDM/eBirdSampling_filtered.tif ./geodata/SDM/
## eBirdSampling_filtered_crop.tif"): error in running command
gsampling <- raster("./geodata/SDM/eBirdSampling_filtered_crop.tif")
# assign projection
projection(gsampling)="+proj=longlat +datum=WGS84"
gsampling
## class : RasterLayer
## dimensions : 4200, 2760, 11592000 (nrow, ncol, ncell)
## resolution : 0.008333333, 0.008333333 (x, y)
## extent : -82, -59, -20.99999, 14 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +no_defs
## source : eBirdSampling_filtered_crop.tif
## names : eBirdSampling_filtered_crop
# convert to points within data region
samplingp <- as(gsampling,"SpatialPointsDataFrame")
samplingp <- samplingp[samplingp$eBirdSampling_filtered_crop>0,]
str(samplingp)
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 17420 obs. of 1 variable:
## .. ..$ eBirdSampling_filtered_crop: num [1:17420] 1 2 1 2 1 1 1 4 1 2 ...
## ..@ coords.nrs : num(0)
## ..@ coords : num [1:17420, 1:2] -61 -61 -61 -61 -61 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : NULL
## .. .. ..$ : chr [1:2] "x" "y"
## ..@ bbox : num [1:2, 1:2] -82 -21 -59 14
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:2] "x" "y"
## .. .. ..$ : chr [1:2] "min" "max"
## ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
## .. .. ..@ projargs: chr "+proj=longlat +datum=WGS84 +no_defs"
## .. .. ..$ comment: chr "GEOGCRS[\"unknown\",\n DATUM[\"World Geodetic System 1984\",\n ELLIPSOID[\"WGS 84\",6378137,298.25722"| __truncated__
head(samplingp)
## eBirdSampling_filtered_crop
## 2518 1
## 2519 2
## 2520 1
## 2523 2
## 2525 1
## 2529 1
# edit column names
colnames(samplingp@data) <- c("observation")
samplingp$presence=0
plot(samplingp, col="green",pch=21,cex=.5)#absences
plot(points, col="red",add=TRUE)#presences
plot(birdrange, col="cyan",add=TRUE)#species range
summary(samplingp)
## Object of class SpatialPointsDataFrame
## Coordinates:
## min max
## x -81.97917 -59.00417
## y -20.99583 13.99584
## Is projected: FALSE
## proj4string : [+proj=longlat +datum=WGS84 +no_defs]
## Number of points: 17420
## Data attributes:
## observation presence
## Min. : 1.000 Min. :0
## 1st Qu.: 1.000 1st Qu.:0
## Median : 2.000 Median :0
## Mean : 5.935 Mean :0
## 3rd Qu.: 3.000 3rd Qu.:0
## Max. :1412.000 Max. :0
combine presence and non-presence point datasets
pdata <- rbind(points[,"presence"],samplingp[,"presence"])
pdata@data[,c("lon","lat")] <- coordinates(pdata)
table(pdata$presence)
##
## 0 1
## 17420 34835
env <- stack(c(rCld,rCldIA,rElv,rVeg))
env
## class : RasterStack
## dimensions : 8400, 5880, 49392000, 4 (nrow, ncol, ncell, nlayers)
## resolution : 0.008333333, 0.008333333 (x, y)
## extent : -83, -34, -56, 14 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +no_defs
## names : SA_meanannual, SA_intra, SA_elevation_mn_GMTED2010_mn_msk, SA_tree_mn_percentage_GFC2013
## min values : 329, 0, -400, 0
## max values : 10000, 3815, 6736, 10000
# rename layers for convenience
vars <- c("cld","cld_ia","elev","forest")
names(env) <- vars
# visual result
options(repr.plot.width=15, repr.plot.height=9)
# check out the plot
plot(env)
Scaling and centering the environmental variables to zero mean and variance of 1
senv <- scale(env[[vars]])
senv
## class : RasterBrick
## dimensions : 8400, 5880, 49392000, 4 (nrow, ncol, ncell, nlayers)
## resolution : 0.008333333, 0.008333333 (x, y)
## extent : -83, -34, -56, 14 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +no_defs
## source : r_tmp_2022-12-12_175839_9108_30290.grd
## names : cld, cld_ia, elev, forest
## min values : -3.9349032, -1.6168665, -1.0685917, -0.5696309
## max values : 2.800472, 4.348538, 6.673642, 2.178319
# this operation is quite long. Would be possible to do in gdal? how?
hist(env)
## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.
## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.
## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.
## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.
hist(senv)
## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.
## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.
## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.
## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.
Annotate the point records with the scaled environmental data
df.xact <- raster::extract(senv,pdata,sp=T)
df.xact <- (df.xact[! is.na(df.xact$forest),])
## convert to 'long' format for easier plotting
df.xactl <- reshape::melt(df.xact@data,id.vars=c("lat","lon","presence"),variable.name="variable")
head(df.xactl)
## lat lon presence variable value
## 1 0.029785 -78.68224 1 cld 2.551142
## 2 5.373100 -75.89100 1 cld 2.587358
## 3 4.768477 -75.45283 1 cld 2.646556
## 4 0.006336 -78.67635 1 cld 2.616609
## 5 6.407356 -75.66417 1 cld 1.810119
## 6 11.107720 -74.04844 1 cld 2.322707
tail(df.xactl)
## lat lon presence variable value
## 209015 -20.97083 -70.13750 0 forest -0.5696309
## 209016 -20.97083 -68.56250 0 forest -0.5696309
## 209017 -20.97916 -70.15417 0 forest -0.5696309
## 209018 -20.98749 -68.55417 0 forest -0.5696309
## 209019 -20.99583 -70.13750 0 forest -0.5696309
## 209020 -20.99583 -67.42917 0 forest -0.5696309
ggplot(df.xactl,aes(x=value,y=presence))+facet_wrap(~variable)+
geom_point()+
stat_smooth(method = "lm", formula = y ~ x + I(x^2), col="red")+
geom_smooth(method="gam",formula=y ~ s(x, bs = "cs")) +
theme(text = element_text(size = 20))
## Warning: Removed 404 rows containing non-finite values (`stat_smooth()`).
## Removed 404 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 404 rows containing missing values (`geom_point()`).
df.xact <- as.data.frame(df.xact)
df.xact$grp <- kfold(df.xact,2)
head(df.xact)
## presence lon lat cld cld_ia elev forest
## 1 1 -78.68224 0.029785 2.551142 -1.2900593 1.081788 1.6430179
## 2 1 -75.89100 5.373100 2.587358 -1.4260987 1.266230 1.8469158
## 3 1 -75.45283 4.768477 2.646556 -1.3119507 3.890734 -0.2396022
## 4 1 -78.67635 0.006336 2.616609 -1.2947503 1.364961 1.7699732
## 5 1 -75.66417 6.407356 1.810119 -0.7943755 2.127684 0.5647226
## 6 1 -74.04844 11.107720 2.322707 -0.9710704 1.992064 0.5636234
## lon.1 lat.1 grp
## 1 -78.68224 0.029785 1
## 2 -75.89100 5.373100 2
## 3 -75.45283 4.768477 2
## 4 -78.67635 0.006336 2
## 5 -75.66417 6.407356 1
## 6 -74.04844 11.107720 1
mdl.glm <- glm(presence~cld+cld_ia*I(cld_ia^2)+elev*I(elev^2)+forest, family=binomial(link=logit), data=subset(df.xact,grp==1))
summary(mdl.glm)
##
## Call:
## glm(formula = presence ~ cld + cld_ia * I(cld_ia^2) + elev *
## I(elev^2) + forest, family = binomial(link = logit), data = subset(df.xact,
## grp == 1))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.9714 -0.0263 0.2261 0.3539 4.9487
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.97053 0.14431 -41.373 < 2e-16 ***
## cld 1.21530 0.08013 15.167 < 2e-16 ***
## cld_ia 0.22476 0.09130 2.462 0.0138 *
## I(cld_ia^2) 0.34291 0.03835 8.941 < 2e-16 ***
## elev 4.99094 0.19347 25.797 < 2e-16 ***
## I(elev^2) -1.87208 0.12204 -15.339 < 2e-16 ***
## forest 1.02948 0.03866 26.630 < 2e-16 ***
## cld_ia:I(cld_ia^2) -0.16774 0.03372 -4.974 6.55e-07 ***
## elev:I(elev^2) 0.16445 0.02280 7.212 5.53e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 32879 on 25922 degrees of freedom
## Residual deviance: 11136 on 25914 degrees of freedom
## (205 observations deleted due to missingness)
## AIC: 11154
##
## Number of Fisher Scoring iterations: 8
Calculate estimates of p(occurrence) for each cell. We can use the predict function in the raster package to make the predictions across the full raster grid and save the output.
pred.glm1 <- predict(mdl.glm,df.xact[which(df.xact$grp==1),vars],type="response")
pred.glm2 <- predict(mdl.glm,df.xact[which(df.xact$grp==2),vars],type="response")
plotROC(df.xact[which(df.xact$grp==1),"presence"],pred.glm1)
plotROC(df.xact[which(df.xact$grp==2),"presence"],pred.glm2)
p1 <- raster::predict(senv,mdl.glm,type="response")
Plot the results as a map:
options(repr.plot.width=8, repr.plot.height=9)
gplot(p1)+geom_tile(aes(fill=value))+
scale_fill_gradientn(
colours=c("blue","green","yellow","orange","red"),
na.value = "transparent")+
geom_polygon(aes(x=long,y=lat,group=group),
data=fortify(birdrange),fill="transparent",col="darkred")+
geom_point(aes(x = lon, y = lat), data = subset(df.xact,presence==1),col="black",size=0.5)+
coord_equal()
## Regions defined for each Polygons
The library caret allow an easy implementation of the Cross Validation for several models.
ctrl <- trainControl(method = "cv", number = 10)
mdl.glm.cv <- train( as.factor(presence) ~cld+cld_ia*I(cld_ia^2)+elev*I(elev^2)+forest,
family=binomial(link=logit), data = df.xact, method = "glm", trControl = ctrl,
metric='Accuracy', na.action=na.exclude)
mdl.glm.cv
## Generalized Linear Model
##
## 52255 samples
## 4 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 46665, 46667, 46667, 46666, 46667, 46665, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9236658 0.8212671
Predict in terms of presence/absence
pred.mdl.glm.cv.raw <- raster::predict(senv,mdl.glm.cv ,type="raw")
plot(pred.mdl.glm.cv.raw)
Predict and plotting in terms of probability
pred.mdl.glm.cv.prob1 <- raster::predict(senv,mdl.glm.cv ,type="prob" , index=1)
plot(pred.mdl.glm.cv.prob1)
pred.mdl.glm.cv.prob2 <- raster::predict(senv,mdl.glm.cv ,type="prob" , index=2)
plot(pred.mdl.glm.cv.prob2)
Random Forest is an ensemble learning method for classification, regression and other tasks that operate by constructing a multitude of decision trees at training time and outputting the class that is the mode of the classes (classification) or mean/average prediction (regression) of the individual trees (source https://en.wikipedia.org/wiki/Random_forest)
The randomForest packages (https://cran.r-project.org/web/packages/randomForest/index.html) allows to run reggression or classification case in R. (check also ranger: A Fast Implementation of Random Forests https://cran.r-project.org/web/packages/ranger/index.html )
mdl.rf <- randomForest(as.factor(presence) ~ cld + cld_ia + elev +forest, data=subset(df.xact,grp==1),
na.action=na.exclude)
mdl.rf
##
## Call:
## randomForest(formula = as.factor(presence) ~ cld + cld_ia + elev + forest, data = subset(df.xact, grp == 1), na.action = na.exclude)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 6.38%
## Confusion matrix:
## 0 1 class.error
## 0 7436 1118 0.13069909
## 1 537 16832 0.03091715
(This may take a while to run)
pred.rf.resp <- raster::predict(senv , mdl.rf , type='response')
plot(pred.rf.resp)
(This may take a while to run)
pred.rf.prob1 <- raster::predict(senv , mdl.rf , type='prob' , index=1)
pred.rf.prob0 <- raster::predict(senv , mdl.rf , type='prob' , index=2)
plot(pred.rf.prob1)
plot(pred.rf.prob0)
Create a raste with 10 km resoultion (0.083333333333)
occ = raster(nrows=420, ncols=276, xmn=-82, xmx=-59, ymn=-21, ymx=14, crs="+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" , vals=0)
occ
## class : RasterLayer
## dimensions : 420, 276, 115920 (nrow, ncol, ncell)
## resolution : 0.08333333, 0.08333333 (x, y)
## extent : -82, -59, -21, 14 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +no_defs
## source : memory
## names : layer
## values : 0, 0 (min, max)
pointcount = function(r, pts){
# make a raster of zeroes like the input
r2 = r
r2[] = 0
# get the cell index for each point and make a table:
counts = table(cellFromXY(r,pts))
# fill in the raster with the counts from the cell index:
r2[as.numeric(names(counts))] = counts
return(r2)
}
occ_raster <- pointcount(occ, points)
plot(occ_raster)
occ_point = rasterToPoints(occ_raster , spatial=TRUE)
str(occ_point)
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 115920 obs. of 1 variable:
## .. ..$ layer: num [1:115920] 0 0 0 0 0 0 0 0 0 0 ...
## ..@ coords.nrs : num(0)
## ..@ coords : num [1:115920, 1:2] -82 -81.9 -81.8 -81.7 -81.6 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : NULL
## .. .. ..$ : chr [1:2] "x" "y"
## ..@ bbox : num [1:2, 1:2] -82 -21 -59 14
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:2] "x" "y"
## .. .. ..$ : chr [1:2] "min" "max"
## ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
## .. .. ..@ projargs: chr "+proj=longlat +datum=WGS84 +no_defs"
## .. .. ..$ comment: chr "GEOGCRS[\"unknown\",\n DATUM[\"World Geodetic System 1984\",\n ELLIPSOID[\"WGS 84\",6378137,298.25722"| __truncated__
Sampling the occurrence points by eliminating the extremes.
occ_point_sel = subset(occ_point , (layer > 0 & layer < 100) )
hist(occ_point_sel@data$layer)
occ_point_sel_predictor <- raster::extract(senv,occ_point_sel,sp=T)
str(occ_point_sel_predictor)
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 1098 obs. of 5 variables:
## .. ..$ layer : num [1:1098] 1 3 1 80 12 1 1 1 1 1 ...
## .. ..$ cld : num [1:1098] -0.06682 -0.00901 0.26539 1.9306 1.72097 ...
## .. ..$ cld_ia: num [1:1098] 2.652 2.53 -0.0313 0.1172 0.3408 ...
## .. ..$ elev : num [1:1098] -0.497 -0.517 -0.498 0.195 0.689 ...
## .. ..$ forest: num [1:1098] -0.229 1.413 -0.293 1.963 1.639 ...
## ..@ coords.nrs : num(0)
## ..@ coords : num [1:1098, 1:2] -74.2 -74.1 -72.4 -74.1 -74 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : NULL
## .. .. ..$ : chr [1:2] "x" "y"
## ..@ bbox : num [1:2, 1:2] -81.1 -18.8 -62.6 11.2
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:2] "x" "y"
## .. .. ..$ : chr [1:2] "min" "max"
## ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
## .. .. ..@ projargs: chr "+proj=longlat +datum=WGS84 +no_defs"
## .. .. ..$ comment: chr "GEOGCRS[\"unknown\",\n DATUM[\"World Geodetic System 1984\",\n ELLIPSOID[\"WGS 84\",6378137,298.25722"| __truncated__
mdl.rf.occ <- randomForest( layer ~ cld + cld_ia + elev + forest , data=occ_point_sel_predictor )
mdl.rf.occ
##
## Call:
## randomForest(formula = layer ~ cld + cld_ia + elev + forest, data = occ_point_sel_predictor)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 1
##
## Mean of squared residuals: 211.8068
## % Var explained: 0.78
pred.rf.occ <- raster::predict(senv , mdl.rf.occ , type='response')
plot(pred.rf.occ)